
* File to import zipcodes for aqs monitoring stations. 


***Inputs:
* $Data/geolist.csv
* $Data/aqs_sites.dta
* $Data/UserZips.dta
* $Data/aqs_user_zips.dta

***Output:
* $Data/geolist.dta
* $Data/aqs_zips.dta
* $Data/AQS_clusters_inzip.dta


* import zip codes from reverse geocoding 

clear

import delimited using $Data/geolist.csv 


drop if v2=="V1"
drop v1
gen ind = _n
	
	save $Data/geolist.dta, replace
	
	

** match AQS zips to monitors and users 	
	
 use $Data/aqs_sites.dta, clear
 
 drop if site_close< td(01jan2012)
 
 keep statecode countycode sitenumber latitude longitude 
 
 keep if longitude<. & latitude<. 
 
 drop if latitude ==0 & longitude==0
	
	gen ind = _n 
	
	merge 1:1 ind using $Data/geolist.dta
	
	drop _merge 
	drop ind
	
	split v2, gen(address) parse(",")
	
	split address3
	
	rename address32 zipcode 
	
	sort zipcode
	replace zipcode ="" if _n>=5276
	
	destring zipcode, replace	
	
	gen zipalt = substr(v2,-10,5)
	
	sort zipalt
	replace zipalt = "" if _n>=5221
	
	destring zipalt, replace ignore("," "Utah")
	
	replace zipcode = zipalt if zipcode==.
	
	
	sort zipcode 
	
	drop if strpos(v2,"Puerto Rico")>0
	drop if strpos(v2,"Mexico")>0

	
	keep statecode countycode sitenumber  zipcode latitude longitude 
	
	gen zip = int( zipcode/100)

	
	save $Data/aqs_zips.dta, replace
	
	
	
	merge m:1 zip using $Data/UserZips.dta
	
	keep if _merge ==3
	drop _merge 
	
	save $Data/aqs_user_zips.dta, replace
	
* generate monitor clusters 
	
	cluster kmeans latitude longitude, k(100)

 
	by zip, sort: egen monitorcluster = mode(_clus_1), maxmode
	
	keep zip monitorcluster 
	
	duplicates drop 
	
	save $Data/AQS_clusters_inzip.dta, replace

	
	
	
	
